1 Effect of UPSTM-Based Decorrelation on Feature Discovery

1.0.1 Loading the libraries

library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)

op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

1.1 Material and Methods

1.2 The Data

DARWIN <- read.csv("~/GitHub/FCA/Data/DARWIN/DARWIN.csv")
rownames(DARWIN) <- DARWIN$ID
DARWIN$ID <- NULL
DARWIN$class <- 1*(DARWIN$class=="P")
print(table(DARWIN$class))
#> 
#>  0  1 
#> 85 89

DARWIN[,1:ncol(DARWIN)] <- sapply(DARWIN,as.numeric)

signedlog <- function(x) { return (sign(x)*log(abs(1.0e12*x)+1.0))}
whof <- !(colnames(DARWIN) %in% c("class"));
DARWIN[,whof] <- signedlog(DARWIN[,whof])

1.2.0.1 Standarize the names for the reporting

studyName <- "DARWIN"
dataframe <- DARWIN
outcome <- "class"

TopVariables <- 10

thro <- 0.80
cexheat = 0.15

1.3 Generaring the report

1.3.1 Libraries

Some libraries

library(psych)
library(whitening)
library("vioplot")
library("rpart")

1.3.2 Data specs

pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
rows col
174 450
pander::pander(table(dataframe[,outcome]))
0 1
85 89

varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]

largeSet <- length(varlist) > 1500 

1.3.3 Scaling the data

Scaling and removing near zero variance columns and highly co-linear(r>0.99999) columns


  ### Some global cleaning
  sdiszero <- apply(dataframe,2,sd) > 1.0e-16
  dataframe <- dataframe[,sdiszero]

  varlist <- colnames(dataframe)[colnames(dataframe) != outcome]
  tokeep <- c(as.character(correlated_Remove(dataframe,varlist,thr=0.99999)),outcome)
  dataframe <- dataframe[,tokeep]

  varlist <- colnames(dataframe)
  varlist <- varlist[varlist != outcome]
  
  iscontinous <- sapply(apply(dataframe,2,unique),length) >= 5 ## Only variables with enough samples



dataframeScaled <- FRESAScale(dataframe,method="OrderLogit")$scaledData

1.4 The heatmap of the data

numsub <- nrow(dataframe)
if (numsub > 1000) numsub <- 1000


if (!largeSet)
{

  hm <- heatMaps(data=dataframeScaled[1:numsub,],
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 xlab="Feature",
                 ylab="Sample",
                 srtCol=45,
                 srtRow=45,
                 cexCol=cexheat,
                 cexRow=cexheat
                 )
  par(op)
}

1.4.0.1 Correlation Matrix of the Data

The heat map of the data


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  #cormat <- Rfast::cora(as.matrix(dataframe[,varlist]),large=TRUE)
  cormat <- cor(dataframe[,varlist],method="pearson")
  cormat[is.na(cormat)] <- 0
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Original Correlation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

[1] 0.9994118

1.5 The decorrelation


DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#> 
#>  Included: 450 , Uni p: 0.006594985 , Uncorrelated Base: 139 , Outcome-Driven Size: 0 , Base Size: 139 
#> 
#> 
 1 <R=0.999,r=0.975,N=  147>, Top: 47( 1 )[ 1 : 47 Fa= 46 : 0.975 ]( 46 , 72 , 0 ),<|>Tot Used: 118 , Added: 72 , Zero Std: 0 , Max Cor: 0.999
#> 
 2 <R=0.999,r=0.975,N=  147>, Top: 7( 1 )[ 1 : 7 Fa= 53 : 0.975 ]( 7 , 17 , 46 ),<|>Tot Used: 140 , Added: 17 , Zero Std: 0 , Max Cor: 0.996
#> 
 3 <R=0.996,r=0.973,N=  147>, Top: 10( 1 )[ 1 : 10 Fa= 60 : 0.973 ]( 10 , 10 , 53 ),<|>Tot Used: 155 , Added: 10 , Zero Std: 0 , Max Cor: 0.973
#> 
 4 <R=0.973,r=0.936,N=   91>, Top: 41( 3 )[ 1 : 41 Fa= 92 : 0.936 ]( 40 , 49 , 60 ),<|>Tot Used: 226 , Added: 49 , Zero Std: 0 , Max Cor: 0.956
#> 
 5 <R=0.956,r=0.928,N=   91>, Top: 10( 1 )[ 1 : 10 Fa= 98 : 0.928 ]( 10 , 10 , 92 ),<|>Tot Used: 239 , Added: 10 , Zero Std: 0 , Max Cor: 0.927
#> 
 6 <R=0.927,r=0.914,N=   91>, Top: 11( 1 )[ 1 : 11 Fa= 103 : 0.914 ]( 10 , 10 , 98 ),<|>Tot Used: 246 , Added: 10 , Zero Std: 0 , Max Cor: 0.912
#> 
 7 <R=0.912,r=0.906,N=   91>, Top: 5( 1 )[ 1 : 5 Fa= 103 : 0.906 ]( 5 , 5 , 103 ),<|>Tot Used: 247 , Added: 5 , Zero Std: 0 , Max Cor: 0.906
#> 
 8 <R=0.906,r=0.853,N=   90>, Top: 42( 1 )[ 1 : 42 Fa= 135 : 0.853 ]( 40 , 44 , 103 ),<|>Tot Used: 313 , Added: 44 , Zero Std: 0 , Max Cor: 0.894
#> 
 9 <R=0.894,r=0.847,N=   90>, Top: 10( 1 )[ 1 : 10 Fa= 137 : 0.847 ]( 10 , 10 , 135 ),<|>Tot Used: 317 , Added: 10 , Zero Std: 0 , Max Cor: 0.845
#> 
 10 <R=0.845,r=0.823,N=   90>, Top: 18( 1 )[ 1 : 18 Fa= 142 : 0.823 ]( 16 , 16 , 137 ),<|>Tot Used: 330 , Added: 16 , Zero Std: 0 , Max Cor: 0.926
#> 
 11 <R=0.926,r=0.863,N=   90>, Top: 1( 1 )[ 1 : 1 Fa= 142 : 0.863 ]( 1 , 1 , 142 ),<|>Tot Used: 330 , Added: 1 , Zero Std: 0 , Max Cor: 0.822
#> 
 12 <R=0.822,r=0.800,N=   27>, Top: 14( 1 )[ 1 : 14 Fa= 149 : 0.800 ]( 13 , 13 , 142 ),<|>Tot Used: 343 , Added: 13 , Zero Std: 0 , Max Cor: 0.837
#> 
 13 <R=0.837,r=0.800,N=   27>, Top: 2( 1 )[ 1 : 2 Fa= 150 : 0.800 ]( 2 , 2 , 149 ),<|>Tot Used: 344 , Added: 2 , Zero Std: 0 , Max Cor: 0.797
#> 
 14 <R=0.797,r=0.800,N=    0>
#> 
 [ 14 ], 0.7971926 Decor Dimension: 344 Nused: 344 . Cor to Base: 184 , ABase: 89 , Outcome Base: 0 
#> 
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]

pander::pander(sum(apply(dataframe[,varlist],2,var)))

692

pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))

122

pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))

4.57

pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))

4.59

1.5.1 The decorrelation matrix


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  
  UPSTM <- attr(DEdataframe,"UPSTM")
  
  gplots::heatmap.2(1.0*(abs(UPSTM)>0),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Decorrelation matrix",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="|Beta|>0",
                    xlab="Output Feature", ylab="Input Feature")
  
  par(op)
}

1.6 The heatmap of the decorrelated data

if (!largeSet)
{

  hm <- heatMaps(data=DEdataframe[1:numsub,],
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 cexRow = cexheat,
                 cexCol = cexheat,
                 srtCol=45,
                 srtRow=45,
                 xlab="Feature",
                 ylab="Sample")
  par(op)
}

1.7 The correlation matrix after decorrelation

if (!largeSet)
{

  cormat <- cor(DEdataframe[,varlistc],method="pearson")
  cormat[is.na(cormat)] <- 0
  
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Correlation after IDeA",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  
  par(op)
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

[1] 0.7971926

1.8 U-MAP Visualization of features

1.8.1 The UMAP based on LASSO on Raw Data


if (nrow(dataframe) < 1000)
{
  classes <- unique(dataframe[1:numsub,outcome])
  raincolors <- rainbow(length(classes))
  names(raincolors) <- classes
  datasetframe.umap = umap(scale(dataframe[1:numsub,varlist]),n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: Original",t='n')
  text(datasetframe.umap$layout,labels=dataframe[1:numsub,outcome],col=raincolors[dataframe[1:numsub,outcome]+1])
}

1.8.2 The decorralted UMAP

if (nrow(dataframe) < 1000)
{

  datasetframe.umap = umap(scale(DEdataframe[1:numsub,varlistc]),n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: After IDeA",t='n')
  text(datasetframe.umap$layout,labels=DEdataframe[1:numsub,outcome],col=raincolors[DEdataframe[1:numsub,outcome]+1])
}

1.9 Univariate Analysis

1.9.1 Univariate



univarRAW <- uniRankVar(varlist,
               paste(outcome,"~1"),
               outcome,
               dataframe,
               rankingTest="AUC")

100 : mean_jerk_in_air6 200 : disp_index12 300 : mean_speed_in_air17 400 : gmrt_on_paper23




univarDe <- uniRankVar(varlistc,
               paste(outcome,"~1"),
               outcome,
               DEdataframe,
               rankingTest="AUC",
               )

100 : La_mean_jerk_in_air6 200 : La_disp_index12 300 : La_mean_speed_in_air17 400 : La_gmrt_on_paper23

1.9.2 Final Table


univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")

##topfive
topvar <- c(1:length(varlist)) <= TopVariables
pander::pander(univarRAW$orderframe[topvar,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
total_time23 37.2 0.503 36.7 0.484 1.03e-05 0.863
total_time15 38.1 0.875 37.1 0.421 5.44e-01 0.844
air_time23 36.6 0.626 35.9 0.656 6.92e-03 0.844
air_time15 37.7 1.094 36.6 0.615 5.06e-01 0.829
total_time17 38.5 0.681 37.8 0.614 4.00e-03 0.824
paper_time23 36.4 0.439 36.0 0.231 6.72e-01 0.814
air_time17 37.9 0.914 37.0 0.795 3.52e-02 0.806
paper_time17 37.6 0.395 37.2 0.439 1.28e-03 0.796
total_time6 37.1 0.777 36.4 0.447 7.16e-01 0.790
air_time16 36.4 1.131 35.2 0.867 9.38e-01 0.787


topLAvar <- univarDe$orderframe$Name[str_detect(univarDe$orderframe$Name,"La_")]
topLAvar <- unique(c(univarDe$orderframe$Name[topvar],topLAvar[1:as.integer(TopVariables/2)]))
finalTable <- univarDe$orderframe[topLAvar,univariate_columns]

theLaVar <- rownames(finalTable)[str_detect(rownames(finalTable),"La_")]

pander::pander(univarDe$orderframe[topLAvar,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
air_time23 36.612 0.626 35.85799 0.656 6.92e-03 0.844
air_time15 37.720 1.094 36.60743 0.615 5.06e-01 0.829
air_time17 37.912 0.914 37.00002 0.795 3.52e-02 0.806
air_time16 36.357 1.131 35.24001 0.867 9.38e-01 0.787
disp_index23 16.116 0.194 15.92591 0.166 3.43e-01 0.787
air_time6 36.694 0.899 35.81144 0.665 7.39e-01 0.784
air_time7 36.742 0.758 36.09042 0.938 5.42e-04 0.779
La_paper_time3 0.284 0.391 -0.00243 0.190 1.12e-01 0.776
gmrt_in_air7 32.948 0.405 33.38157 0.396 9.99e-01 0.775
air_time2 36.256 1.176 35.08828 1.002 2.05e-01 0.773
La_mean_speed_on_paper2 -0.121 0.210 0.00476 0.155 6.60e-05 0.730
La_mean_speed_on_paper3 -0.125 0.239 0.03519 0.147 8.09e-05 0.727
La_mean_acc_on_paper3 0.262 0.387 -0.01629 0.297 4.33e-01 0.721
La_mean_speed_on_paper13 -5.051 0.068 -5.01659 0.107 2.18e-05 0.718

dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")

theSigDc <- dc[theLaVar]
names(theSigDc) <- NULL
theSigDc <- unique(names(unlist(theSigDc)))


theFormulas <- dc[rownames(finalTable)]
deFromula <- character(length(theFormulas))
names(deFromula) <- rownames(finalTable)

pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
mean total fraction
2.17 222 0.493


allSigvars <- names(dc)



dx <- names(deFromula)[1]
for (dx in names(deFromula))
{
  coef <- theFormulas[[dx]]
  cname <- names(theFormulas[[dx]])
  names(cname) <- cname
  for (cf in names(coef))
  {
    if (cf != dx)
    {
      if (coef[cf]>0)
      {
        deFromula[dx] <- paste(deFromula[dx],
                               sprintf("+ %5.3f*%s",coef[cf],cname[cf]))
      }
      else
      {
        deFromula[dx] <- paste(deFromula[dx],
                               sprintf("%5.3f*%s",coef[cf],cname[cf]))
      }
    }
  }
}

finalTable <- rbind(finalTable,univarRAW$orderframe[theSigDc[!(theSigDc %in% rownames(finalTable))],univariate_columns])


orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- deFromula[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]

Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores")

finalTable <- finalTable[order(-finalTable$ROCAUC),]
pander::pander(finalTable[,Final_Columns])
  DecorFormula caseMean caseStd controlMean controlStd controlKSP ROCAUC RAWAUC fscores
air_time23 36.612 0.626 35.85799 0.656 6.92e-03 0.844 0.844 1
air_time15 37.720 1.094 36.60743 0.615 5.06e-01 0.829 0.829 1
air_time17 37.912 0.914 37.00002 0.795 3.52e-02 0.806 0.806 1
air_time16 36.357 1.131 35.24001 0.867 9.38e-01 0.787 0.787 1
disp_index23 16.116 0.194 15.92591 0.166 3.43e-01 0.787 0.787 1
air_time6 36.694 0.899 35.81144 0.665 7.39e-01 0.784 0.784 1
air_time7 36.742 0.758 36.09042 0.938 5.42e-04 0.779 0.779 1
La_paper_time3 -0.954disp_index3 + 1.000paper_time3 -0.596*pressure_mean3 0.284 0.391 -0.00243 0.190 1.12e-01 0.776 0.715 -2
gmrt_in_air7 32.948 0.405 33.38157 0.396 9.99e-01 0.775 0.775 1
air_time2 36.256 1.176 35.08828 1.002 2.05e-01 0.773 0.773 1
La_mean_speed_on_paper2 -0.876gmrt_on_paper2 + 1.000mean_speed_on_paper2 -0.121 0.210 0.00476 0.155 6.60e-05 0.730 0.720 -1
La_mean_speed_on_paper3 -0.877gmrt_on_paper3 + 1.000mean_speed_on_paper3 + 0.000*pressure_mean3 -0.125 0.239 0.03519 0.147 8.09e-05 0.727 0.291 -2
La_mean_acc_on_paper3 + 1.000mean_acc_on_paper3 -0.718pressure_mean3 0.262 0.387 -0.01629 0.297 4.33e-01 0.721 0.691 0
mean_speed_on_paper2 NA 27.813 0.492 27.88786 3.091 5.50e-13 0.720 0.720 NA
La_mean_speed_on_paper13 -1.035gmrt_on_paper13 + 1.000mean_speed_on_paper13 -5.051 0.068 -5.01659 0.107 2.18e-05 0.718 0.626 -1
paper_time3 NA 36.536 0.657 35.35856 5.538 3.33e-16 0.715 0.715 NA
mean_acc_on_paper3 NA 25.438 0.310 24.65355 3.859 0.00e+00 0.691 0.691 NA
disp_index3 NA 16.086 0.639 15.59248 2.469 4.98e-13 0.669 0.669 4
mean_speed_on_paper13 NA 28.413 0.557 28.66694 0.421 9.93e-01 0.626 0.626 NA
gmrt_on_paper13 NA 32.328 0.536 32.54115 0.400 9.81e-01 0.606 0.606 2
gmrt_on_paper3 NA 32.296 0.519 31.78783 4.982 3.33e-16 0.358 0.358 NA
pressure_mean3 NA 35.043 0.300 34.33865 5.363 0.00e+00 0.341 0.341 NA
gmrt_on_paper2 NA 31.879 0.494 31.82043 3.524 2.06e-13 0.337 0.337 NA
mean_speed_on_paper3 NA 28.191 0.528 27.90558 4.378 6.66e-16 0.291 0.291 NA

1.10 Comparing IDeA vs PCA vs EFA

1.10.1 PCA

featuresnames <- colnames(dataframe)[colnames(dataframe) != outcome]
pc <- prcomp(dataframe[,iscontinous],center = TRUE,scale. = TRUE)   #principal components
predPCA <- predict(pc,dataframe[,iscontinous])
PCAdataframe <- as.data.frame(cbind(predPCA,dataframe[,!iscontinous]))
colnames(PCAdataframe) <- c(colnames(predPCA),colnames(dataframe)[!iscontinous]) 
#plot(PCAdataframe[,colnames(PCAdataframe)!=outcome],col=dataframe[,outcome],cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)

#pander::pander(pc$rotation)


PCACor <- cor(PCAdataframe[,colnames(PCAdataframe) != outcome])


  gplots::heatmap.2(abs(PCACor),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "PCA Correlation",
                    cexRow = 0.5,
                    cexCol = 0.5,
                     srtCol=45,
                     srtRow= -45,
                    key.title=NA,
                    key.xlab="Pearson Correlation",
                    xlab="Feature", ylab="Feature")

1.10.2 EFA


EFAdataframe <- dataframeScaled

if (length(iscontinous) < 2000)
{
  topred <- min(length(iscontinous),nrow(dataframeScaled),ncol(predPCA)/2)
  if (topred < 2) topred <- 2
  
  uls <- fa(dataframeScaled[,iscontinous],nfactors=topred,rotate="varimax",warnings=FALSE)  # EFA analysis
  predEFA <- predict(uls,dataframeScaled[,iscontinous])
  EFAdataframe <- as.data.frame(cbind(predEFA,dataframeScaled[,!iscontinous]))
  colnames(EFAdataframe) <- c(colnames(predEFA),colnames(dataframeScaled)[!iscontinous]) 


  
  EFACor <- cor(EFAdataframe[,colnames(EFAdataframe) != outcome])
  
  
    gplots::heatmap.2(abs(EFACor),
                      trace = "none",
    #                  scale = "row",
                      mar = c(5,5),
                      col=rev(heat.colors(5)),
                      main = "EFA Correlation",
                      cexRow = 0.5,
                      cexCol = 0.5,
                       srtCol=45,
                       srtRow= -45,
                      key.title=NA,
                      key.xlab="Pearson Correlation",
                      xlab="Feature", ylab="Feature")
}

1.11 Effect on CAR modeling

par(op)
par(xpd = TRUE)
dataframe[,outcome] <- factor(dataframe[,outcome])
rawmodel <- rpart(paste(outcome,"~."),dataframe,control=rpart.control(maxdepth=3))
pr <- predict(rawmodel,dataframe,type = "class")

  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(rawmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,dataframe[,outcome]==0))
  }


pander::pander(table(dataframe[,outcome],pr))
  0 1
0 78 7
1 7 82
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.5115 0.4347 0.588
    tp 0.5115 0.4347 0.588
    se 0.9213 0.8446 0.968
    sp 0.9176 0.8377 0.966
    diag.ac 0.9195 0.8687 0.955
    diag.or 130.5306 43.7750 389.223
    nndx 1.1919 1.0706 1.466
    youden 0.8390 0.6823 0.934
    pv.pos 0.9213 0.8446 0.968
    pv.neg 0.9176 0.8377 0.966
    lr.pos 11.1878 5.4882 22.807
    lr.neg 0.0857 0.0420 0.175
    p.rout 0.4885 0.4121 0.565
    p.rin 0.5115 0.4347 0.588
    p.tpdn 0.0824 0.0338 0.162
    p.tndp 0.0787 0.0322 0.155
    p.dntp 0.0787 0.0322 0.155
    p.dptn 0.0824 0.0338 0.162
  • tab:

      Outcome + Outcome - Total
    Test + 82 7 89
    Test - 7 78 85
    Total 89 85 174
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.920 0.869 0.955
3 se 0.921 0.845 0.968
4 sp 0.918 0.838 0.966
6 diag.or 130.531 43.775 389.223

par(op)
par(xpd = TRUE)
DEdataframe[,outcome] <- factor(DEdataframe[,outcome])
IDeAmodel <- rpart(paste(outcome,"~."),DEdataframe,control=rpart.control(maxdepth=3))
pr <- predict(IDeAmodel,DEdataframe,type = "class")

  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(IDeAmodel,main="IDeA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(IDeAmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,DEdataframe[,outcome]==0))
  }

pander::pander(table(DEdataframe[,outcome],pr))
  0 1
0 80 5
1 10 79
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.4828 0.4065 0.560
    tp 0.5115 0.4347 0.588
    se 0.8876 0.8031 0.945
    sp 0.9412 0.8680 0.981
    diag.ac 0.9138 0.8618 0.951
    diag.or 126.4000 41.3399 386.478
    nndx 1.2065 1.0806 1.490
    youden 0.8288 0.6711 0.925
    pv.pos 0.9405 0.8665 0.980
    pv.neg 0.8889 0.8051 0.945
    lr.pos 15.0899 6.4267 35.431
    lr.neg 0.1194 0.0664 0.215
    p.rout 0.5172 0.4404 0.594
    p.rin 0.4828 0.4065 0.560
    p.tpdn 0.0588 0.0194 0.132
    p.tndp 0.1124 0.0552 0.197
    p.dntp 0.0595 0.0196 0.133
    p.dptn 0.1111 0.0546 0.195
  • tab:

      Outcome + Outcome - Total
    Test + 79 5 84
    Test - 10 80 90
    Total 89 85 174
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.914 0.862 0.951
3 se 0.888 0.803 0.945
4 sp 0.941 0.868 0.981
6 diag.or 126.400 41.340 386.478

par(op)
par(xpd = TRUE)
PCAdataframe[,outcome] <- factor(PCAdataframe[,outcome])
PCAmodel <- rpart(paste(outcome,"~."),PCAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(PCAmodel,PCAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
  plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
  text(PCAmodel, use.n = TRUE,cex=0.75)
  ptab <- epiR::epi.tests(table(pr==0,PCAdataframe[,outcome]==0))
}

pander::pander(table(PCAdataframe[,outcome],pr))
  0 1
0 65 20
1 2 87
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.6149 0.53831 0.6876
    tp 0.5115 0.43471 0.5879
    se 0.9775 0.92117 0.9973
    sp 0.7647 0.66026 0.8500
    diag.ac 0.8736 0.81485 0.9190
    diag.or 141.3750 31.90534 626.4434
    nndx 1.3473 1.18032 1.7199
    youden 0.7422 0.58144 0.8472
    pv.pos 0.8131 0.72619 0.8819
    pv.neg 0.9701 0.89629 0.9964
    lr.pos 4.1545 2.82823 6.1027
    lr.neg 0.0294 0.00743 0.1163
    p.rout 0.3851 0.31241 0.4617
    p.rin 0.6149 0.53831 0.6876
    p.tpdn 0.2353 0.15004 0.3397
    p.tndp 0.0225 0.00273 0.0788
    p.dntp 0.1869 0.11807 0.2738
    p.dptn 0.0299 0.00364 0.1037
  • tab:

      Outcome + Outcome - Total
    Test + 87 20 107
    Test - 2 65 67
    Total 89 85 174
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.874 0.815 0.919
3 se 0.978 0.921 0.997
4 sp 0.765 0.660 0.850
6 diag.or 141.375 31.905 626.443


par(op)

1.11.1 EFA


  EFAdataframe[,outcome] <- factor(EFAdataframe[,outcome])
  EFAmodel <- rpart(paste(outcome,"~."),EFAdataframe,control=rpart.control(maxdepth=3))
  pr <- predict(EFAmodel,EFAdataframe,type = "class")
  
  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(EFAmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,EFAdataframe[,outcome]==0))
  }


  pander::pander(table(EFAdataframe[,outcome],pr))
  0 1
0 82 3
1 10 79
  pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.4713 0.39528 0.5482
    tp 0.5115 0.43471 0.5879
    se 0.8876 0.80306 0.9448
    sp 0.9647 0.90030 0.9927
    diag.ac 0.9253 0.87563 0.9596
    diag.or 215.9333 57.29883 813.7550
    nndx 1.1732 1.06673 1.4217
    youden 0.8523 0.70336 0.9374
    pv.pos 0.9634 0.89678 0.9924
    pv.neg 0.8913 0.80918 0.9466
    lr.pos 25.1498 8.25615 76.6112
    lr.neg 0.1165 0.06486 0.2091
    p.rout 0.5287 0.45175 0.6047
    p.rin 0.4713 0.39528 0.5482
    p.tpdn 0.0353 0.00734 0.0997
    p.tndp 0.1124 0.05521 0.1969
    p.dntp 0.0366 0.00761 0.1032
    p.dptn 0.1087 0.05337 0.1908
  • tab:

      Outcome + Outcome - Total
    Test + 79 3 82
    Test - 10 82 92
    Total 89 85 174
  • method: exact

  • digits: 2

  • conf.level: 0.95

  pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.925 0.876 0.960
3 se 0.888 0.803 0.945
4 sp 0.965 0.900 0.993
6 diag.or 215.933 57.299 813.755
  par(op)